#' ------------------------------------------------------------------------------
#' Title: Bayesian Aldrich-McKelvey Model Analysis for CHES Data
#'
#' Description: This script generates all figures for the final version of the
#'              manuscript. It visualizes comparisons of different BAM model
#'              estimations, including regional and country-level analyses of
#'              alpha/beta parameters and their relationships with expert
#'              characteristics.

# Install and load required packages -------------------------------------------
if (!require("pacman")) install.packages("pacman")
p_load(tidyverse, rstan, hbamr, ggpubr)

# Load Data --------------------------------------------------------------------

# BAM Estimates
bam_fit <- readRDS('outputs/bam.rds')

# CHES Data
df_expert <- readRDS("outputs/df_global_expert.rds")
df_expert_party <- readRDS("outputs/df_global_expert_party.rds")
df_bam <- readRDS("outputs/df_bam.rds")

# Functions --------------------------------------------------------------------

# Bootstrap function for confidence intervals
boot <- function(data, iterations = 1000, grouping_vars) {
  # Initialize results list
  results <- list()
  
  # Loop for all iterations
  for (i in 1:iterations) {
    # Sample with replacement
    sampled_df <- data[sample(nrow(data), size = nrow(data), replace = TRUE), ]

    # Group by the specified variables and calculate means
    grouped_means <- sampled_df %>%
      group_by(across(all_of(grouping_vars))) %>%
      summarise(
        left = mean(party_left, na.rm = TRUE),
        center = mean(party_center, na.rm = TRUE),
        right = mean(party_right, na.rm = TRUE),
        .groups = "drop"
      )
    
    # Store this iteration's results
    results[[i]] <- grouped_means
  }
  
  # Combine all bootstrap iterations
  all_results <- bind_rows(results, .id = "iteration")
  
  return(all_results)
}

# Bootstraps to quantiles
boot_to_quan <- function(data, grouping_vars) {
  # Calculate quantiles for each group and measure
  summary_stats <- data %>%
    group_by(across(all_of(grouping_vars))) %>%
    summarise(
      left_lwr = quantile(left, 0.025),
      left_score = median(left),
      left_upr = quantile(left, 0.975),
      center_lwr = quantile(center, 0.025),
      center_score = median(center),
      center_upr = quantile(center, 0.975),
      right_lwr = quantile(right, 0.025),
      right_score = median(right),
      right_upr = quantile(right, 0.975),
      .groups = "drop"
    )
  
  # Pivot longer for plotting 
  plot_dataset <- summary_stats %>%
    pivot_longer(
      cols = -all_of(grouping_vars),
      names_to = c("position", "stat"),
      names_sep = "_",
      values_to = "value"
    ) %>%
    pivot_wider(
      names_from = stat,
      values_from = value
    ) %>%
    mutate(
      position = factor(
        position, 
        levels = c("left", "center", "right"),
        labels = c("Left", "Center", "Right")
      )
    )
  
  return(plot_dataset)
}

# Extract Positions
ex_fun <- function(data, quan) {
  df <- as.data.frame(rstan::extract(data, quan)) %>%
    sapply(., quantile, probs = c(0.025, 0.5, 0.975)) %>%
    t(.) %>%
    as.data.frame(.)
  return(df)
}

# Function for extracting distributions
post_dist <- function(bam_ob, co, group_v = country) {
  # Extract iterations from stanfit object
  iter <- t(as.data.frame(rstan::extract(bam_ob, co)))
  # Extract country or region IDs
  g_data <- bam_i[, if (group_v == 'country') 4 else 2]
  # Group by grouping variable and estimate mean
  mean_iter <- cbind(g_data, iter) %>% 
    as.data.frame() %>% 
    group_by(g_data) %>%
    summarise_all(mean)
  # Estimate quantiles
  names(mean_iter)[1] <- if (group_v == "country") "country_id" else "region_id"
  return(mean_iter)
}

# Function for posterior quantiles
post_mean <- function(bam_ob, co, group_v = country) {
  # Extract iterations from stanfit object
  iter <- t(as.data.frame(rstan::extract(bam_ob, co)))
  # Extract country or region IDs
  g_data <- df_expert[, if (group_v == 'country') 2 else 1]
  names(g_data) <- "group_v"
  # Group by grouping variable and estimate mean
  mean_iter <- cbind(g_data, iter) %>%
    group_by(group_v) %>%
    summarise_all(mean)
  # Estimate quantiles
  group_q <- apply(
    mean_iter[, c(2, ncol(mean_iter))],
    1,
    quantile, probs = c(0.025, .5, .975)
  )
  group_q_t <- t(group_q)
  est <- cbind(mean_iter[, 1], group_q_t)
  names(est) <- if (group_v == "country") {
    c("country_id", paste0(co, "_lwr"), paste0(co, "_scr"), paste0(co, "_upr"))
  } else {
    c("region_id", paste0(co, "_lwr"), paste0(co, "_scr"), paste0(co, "_upr"))
  }
  return(est)
}

# Between and within differences functions
bw_var <- function(y, x) {
  # Run ANOVA with simplified formula
  aov_result <- summary(aov(y ~ x))[[1]]
  
  # Extract variances
  between_var <- aov_result["x", "Mean Sq"]   # Mean Sq for region
  within_var <- aov_result["Residuals", "Mean Sq"]
  
  # Return as named list
  return(list(
    between = between_var,
    within = within_var
  ))
}

# Variance to quantiles 
var_to_quan <- function(data, aov_type) {
  data %>%
    pivot_longer(everything()) %>%
    mutate(value = as.numeric(value)) %>%
    group_by(name) %>%
    summarise(
      point = mean(value), 
      lwr = quantile(value, 0.025),
      upr = quantile(value, 0.975)
    ) %>%
    mutate(type = aov_type) %>%
    rename("variance" = "name")
}

# Theme for plots
theme_ches <- function() {
  theme(
    legend.position = "bottom", # Places the legend at the bottom
    legend.title = element_blank(), # Eliminate title
    legend.box.background = element_rect(colour = "black"), # Black border
    legend.background = element_blank(), # Transparent background
    legend.key = element_blank(), # Transparent legend keys
    panel.background = element_rect(fill = 'white', colour = 'white'), # White panel
    panel.grid.major = element_line(colour = "grey95"), # Light grey grid
    panel.grid.minor = element_line(colour = "grey95"), # Light grey grid
    axis.ticks.y = element_blank(), # Remove y-axis ticks
    axis.ticks.x = element_blank(), # Remove x-axis ticks
    strip.background = element_rect(fill = "white") # White facet labels
  )
}

# FIGURE 1 & 2: MEAN PLACEMENT VIGNETTES ---------------------------------------

# Region Names
df_expert <- df_expert %>%
  mutate(region = recode(
    as.character(region_id),
    '1' = 'Europe',
    '2' = 'Latin America',
    '3' = 'North America',
    '4' = 'Australia',
    '5' = 'Israel',
    '6' = 'North America'
  ))

# Set seed and iterations
set.seed(1)
iterations <- 1000

# Bootstrap and quantiles
region_boot <- boot(df_expert, iterations, "region")
region_quan <- boot_to_quan(region_boot, "region")
country_boot <- boot(df_expert, iterations, c("region", "country"))
country_quan <- boot_to_quan(country_boot, c("region", "country"))

# Figure 1: Region Averages
ggplot(region_quan, aes(y = score, x = position, color = region)) +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_errorbar(
    aes(ymax = lwr, ymin = upr), 
    width = 0.4,
    position = position_dodge(width = 0.5)
  ) +
  scale_y_continuous(limits = c(0, 10)) +
  scale_color_grey(start = 0, end = .8) +
  labs(x = 'Vignette Average Position', y = '') +
  theme_ches()
ggsave(
  "figures/fg1.tif",  
  width = 7,
  height = 5
)

# Figure A2: Country averages
ggplot(country_quan,
  aes(x = score, y = fct_reorder(country, region), color = position)) +
  geom_point(size = .3) +
  geom_errorbarh(aes(xmax = upr, xmin = lwr), height = 0.6) +
  scale_y_discrete(limits = rev) +
  scale_x_continuous(
    limits = c(0, 10), 
    breaks = c(0, 2.5, 5, 7.5, 10)
  ) +
  scale_color_grey(start = 0, end = .8) +
  labs(y = '', x = '') +
  theme_ches()
ggsave(
  "figures/fgA2.tif",  
  width = 9, 
  height = 6
)
rm(
  country_boot, country_quan, region_boot, region_quan, 
  boot, boot_to_quan, iterations
)

# FIGURE 2: HYPOTHETICAL Alphas and Betas --------------------------------------

# Alpha and Beta
alpha <- c('alpha>0', 'alpha==0', 'alpha<0')
beta <- c('beta<1', 'beta==1', 'beta>1', 'beta<0')
df <- merge(alpha, beta)
names(df) <- c('Alpha', 'Beta')

# Position
df$a <- c(0.5, -0.5, -1.5, 0, -1, -2, -0.5, -1.5, -2.5, 2, 1, 0)
df$b <- c(1.5, 0.5, -0.5, 2, 1, 0, 2.5, 1.5, 0.5, 0, -1, -2)

# Beta as factor
df$Beta <- factor(df$Beta, levels = c('beta<0', 'beta<1', 'beta==1', 'beta>1')) 
df$Alpha <- factor(df$Alpha, levels = c('alpha<0', 'alpha==0', 'alpha>0')) 

df %>% 
  filter(Beta != 'beta<0') %>% 
  pivot_longer(c(a, b)) %>%
  mutate(stimuli = rep(c(-1, 1), 9)) %>% 
  pivot_longer(c(value, stimuli), names_to = 'names2') %>%
  mutate(
    names2 = ifelse(names2 == 'stimuli', 'Stimuli', 'Perception'),
    y = ifelse(names2 == 'Stimuli', 0.5, -0.5), 
    names2 = factor(names2, levels = c('Stimuli', 'Perception'))
  ) %>% 
  ggplot(aes(x = value, y = y, color = names2)) +
  geom_point(aes(shape = names2), size = 2) +
  scale_y_continuous(limits = c(-1.5, 1.5)) +
  scale_color_grey(start = 0, end = .5) +
  facet_grid(Beta ~ Alpha, labeller = label_parsed) +
  theme_ches() +
  theme(strip.text.x = element_text(size = 12),
        strip.text.y = element_text(size = 12),
        axis.ticks.y = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        axis.text.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.y = element_blank(),
        panel.background = element_rect(fill = 'transparent', colour = 'black'),
        strip.background = element_rect(fill = "transparent"))

ggsave(
  "figures/fg2.tif", 
  width = 9, 
  height = 6
)

rm(alpha, beta, df)

# FIGURE A1: Experts with Extreme Shift and stretch ----------------------------

# Alphas and Betas by respondent
bam_i <- cbind(
  as.data.frame(rstan::extract(bam_fit, c('alpha'))) %>% 
    sapply(., quantile, probs = 0.5), 
  as.data.frame(rstan::extract(bam_fit, c('beta'))) %>% 
    sapply(., quantile, probs = 0.5)
) %>% 
  as.data.frame() %>% 
  rename("alpha" = "V1", "beta" = "V2") %>% 
  mutate(obs = row_number()) %>% 
  merge(
    df_bam %>% 
      group_by(region_id, country_id, country, expert_id) %>% 
      summarise(obs = first(obs)), 
    ., 
    by = "obs"
  )
  
# Party Positions
## Extract Positions 
bam_pp <- ex_fun(bam_fit, "Z")
## Merge positions with countries and regions IDs
bam_z <- merge(
  df_bam %>% 
    group_by(pnum) %>% 
    summarise(party_id = first(party), party_id = gsub("lrecon_", "", party_id)), 
  df_expert_party %>% 
    group_by(region_id, country, party_id, party) %>% 
    summarise(lrecon = mean(lrecon, na.rm = TRUE)) %>% 
    ungroup() %>% 
    mutate(
      lrecon = as.numeric(haven::zap_labels(lrecon)),
      lrecon_s = (lrecon-mean(lrecon))/sd(lrecon)
    ), 
  by = "party_id"
) %>%
merge(
  ., 
  bam_pp %>% 
    mutate(pnum = seq(1:nrow(.))) %>% 
    rename(bam_score = 2) %>% 
    select(pnum, bam_score), 
  by = "pnum"
)

# Expert + Stimuli Positions + Perceived
## Function
exp_placement <- function(cc, id) {
  cbind(
    bam_z %>% 
      filter(country == cc) %>% 
      select(country, bam_score),
    bam_i %>% 
      filter(expert_id == id) %>% 
      select(alpha, beta)
  ) %>% 
  mutate(lrecon_e = alpha + beta*bam_score)
}
## Placements and Stimuli
exp_pp <- rbind(
  exp_placement("United States", 7026), # Negative Alpha: United States
  exp_placement("Argentina", 5017), # Positive Alpha: Argentina
  exp_placement("Australia", 7110), # Smallest Beta: Slovakia
  exp_placement("Israel", 8001) # Biggest Beta: Australia
)
## To long
exp_pp_long <- exp_pp %>% 
  pivot_longer(cols = c(bam_score, lrecon_e)) %>% 
  mutate(
    y = ifelse(name == "bam_score", 0.5, -0.5), 
    country = factor(
      country,
      levels = c("United States", "Argentina", "Australia", "Israel")),
    type = ifelse(name == 'bam_score', "Stimuli", "Perception"),
    type = factor(type, levels = c("Stimuli", "Perception"))
  )

# Labels
e_labs <- exp_pp_long %>%
  select(country, alpha, beta) %>%
  group_by(country) %>%
  summarise_all(first) %>%
  mutate(
    label_a = paste0(expression("alpha =="), round(alpha, 3)),
    label_b = paste0(expression("beta =="), trunc(beta * 1000) / 1000)
  )

# Plots: Alpha
expert_alpha <- ggplot(
  exp_pp_long %>%
    filter(country == "United States" | country == "Argentina"),
  aes(x = value, y = y)
) +
  geom_point(aes(shape = type, color = type), size = 2) +
  scale_color_grey(start = 0, end = .5) +
  scale_y_continuous(limits = c(-1, 1)) +
  geom_vline(xintercept = 0, linetype = "dotted") +
  scale_x_continuous(limits = c(-2.2, 2.2)) +
  labs(y = expression(alpha)) +
  facet_grid(~ country) +
  theme_ches() +
  geom_text(
    data = e_labs %>% 
      filter(country == "United States" | country == "Argentina"), 
    aes(x = 1, y = -1, label = label_a), 
    parse = TRUE,
    hjust = 0, 
    size = 3.25
  )
expert_alpha

# Plots: Beta
expert_beta <- ggplot(
  exp_pp_long %>% 
    filter(country == "Australia" | country == "Israel"), 
  aes(x = value, y = y)
) +
  geom_point(aes(shape = type, color = type), size = 2) +
  scale_color_grey(start = 0, end = .5) +
  scale_y_continuous(limits = c(-1, 1)) +
  geom_vline(xintercept = 0, linetype = "dotted") +
  scale_x_continuous(limits = c(-2.2, 2.2)) +
  labs(y = expression(beta)) +
  facet_grid(~ country) +
  theme_ches() +
  geom_text(
    data = e_labs %>% 
      filter(country == "Australia" | country == "Israel"), 
    aes(x = 1, y = -1, label = label_b), 
    parse = TRUE,
    hjust = 0, 
    size = 3.25
  )
expert_beta

# Join Plots and save
ggpubr::ggarrange(
  expert_alpha, expert_beta, 
  nrow = 2,
  common.legend = TRUE, 
  legend = 'bottom'
) +
  theme(plot.margin = margin(b = 1))

ggsave(
  "figures/fgA1.tif",
  width = 9, 
  height = 7
)  

rm(exp_pp, exp_pp_long, expert_alpha, expert_beta, e_labs)

# Figure 3: Region distributions -----------------------------------------------

# Alpha Estimates
region_alpha <- post_dist(bam_fit, "alpha", "region") %>% 
  mutate(
    region = case_when(
      region_id == 1 ~ "Europe", 
      region_id == 2 ~ "Latin America", 
      region_id == 3 ~ "North America", 
      region_id == 4 ~ "Australia", 
      region_id == 5 ~ "Israel",
      region_id == 6 ~ "North America"
    )
  ) %>% 
  select(-region_id) %>% 
  pivot_longer(-region)

# Beta Estimates
region_beta <- post_dist(bam_fit, "beta", "region") %>% 
  mutate(
    region = case_when(
      region_id == 1 ~ "Europe", 
      region_id == 2 ~ "Latin America", 
      region_id == 3 ~ "North America", 
      region_id == 4 ~ "Australia", 
      region_id == 5 ~ "Israel",
      region_id == 6 ~ "North America"
    )
  ) %>% 
  select(-region_id) %>% 
  pivot_longer(-region)


# PLOT
alpha_post <- region_alpha %>% 
  ggplot(aes(x = value)) +
  geom_density(fill = "grey", alpha = 0.4) +
  geom_vline(aes(xintercept = 0), linetype = "longdash", alpha = 0.3) +
  labs(x = "Alpha Posterior Distribution", y = '')  +  
  scale_x_continuous(limits = c(-0.5, 0.5)) +
  scale_y_continuous(limits = c(0, 40)) +
  facet_grid(vars(region)) +
  theme_ches()

beta_post <- region_beta %>% 
  ggplot(aes(x = value)) +
  geom_density(fill = "grey", alpha = 0.4) +
  geom_vline(aes(xintercept = 1), linetype = "longdash", alpha = 0.3) +
  labs(x = "Beta Posterior Distribution", y = '')  +  
  scale_x_continuous(limits = c(0.5, 1.5)) +
  facet_grid(vars(region)) +
  theme_ches()

# Join Plots and save
ggpubr::ggarrange(alpha_post, beta_post)

ggsave(
  "figures/fg3.tif",
  width = 6,
  height = 8
) 

# Plot interpretation
## alpha
region_alpha %>%  
  group_by(region) %>% 
  summarise(
    min = min(value), 
    mean = mean(value), 
    median = median(value),
    max = max(value)
  )
# Beta
region_beta %>%  
  group_by(region) %>% 
  summarise(
    min = min(value), 
    mean = mean(value), 
    median = median(value),
    max = max(value)
  )
# Party Positions
summary(bam_z$bam_score)
sd(bam_z$bam_score)

rm(region_alpha, region_beta, alpha_post, beta_post)

# FIGURE A3: MEAN ALPHA AND BETA PER COUNTRY ------------------------------------

# Country Estimates
country_means <- cbind(
  post_mean(bam_fit, "alpha", "country"), 
  post_mean(bam_fit, "beta", "country")[, -1]
) %>% 
  merge(
    df_expert %>% 
      select(region_id, country_id, country) %>%
      group_by(region_id, country_id) %>% 
      summarise(country = first(country)), 
    by = "country_id"
  ) %>% 
  mutate(
    region = case_when(
      region_id == 1 ~ "Europe", 
      region_id == 2 ~ "Latin America", 
      region_id == 3 ~ "North America", 
      region_id == 4 ~ "Australia", 
      region_id == 5 ~ "Israel",
      region_id == 6 ~ "North America"
    )
  )

# Plot
alpha_fig <- ggplot(
  country_means, 
  aes(x = alpha_scr, y = reorder(country, alpha_scr), color = region)
) +
  geom_point(size = .7) +
  geom_errorbarh(
    aes(xmax = alpha_upr, xmin = alpha_lwr), 
    height = 0.4
  ) +
  geom_vline(xintercept = 0, linetype = "longdash", alpha = 0.3) +
  scale_x_continuous(limits = c(-0.5, 0.5)) +
  labs(y = '', x = 'Alpha Posterior') +
  scale_colour_grey() +
  theme_ches()


beta_fig <- ggplot(
  country_means, 
  aes(x = beta_scr, y = reorder(country, beta_scr), color = region)
) +
  geom_point(size = .7) +
  geom_errorbarh(
    aes(xmax = beta_upr, xmin = beta_lwr), 
    height = 0.4
  ) +
  geom_vline(xintercept = 1, linetype = "longdash", alpha = 0.3) +
  scale_x_continuous(limits = c(0.5, 1.5)) +
  labs(y = '', x = 'Beta Posterior') +
  scale_colour_grey() +
  theme_ches()

# TOGETHER
ggpubr::ggarrange(
  alpha_fig, beta_fig, 
  common.legend = TRUE, 
  legend = 'bottom'
)  +
  theme(plot.margin = margin(b = 1))

ggsave(
  "figures/fgA3.tif", 
  width = 6,
  height = 8
)

rm(alpha_fig, beta_fig, country_means)

# Figure 4: Linear models predicting alpha and beta ----------------------------

# Merge with expert level info
bam_i <- merge(
  bam_i, 
  df_expert %>% 
    select(expert_id, expert_gender, expert_year, expert_lrgen), 
  by = "expert_id"
)

bam_i$expert_lrgen <- scale(bam_i$expert_lrgen)
bam_i$expert_lrgen_dist <- scale(abs(bam_i$expert_lrgen)) 
bam_i$right_wing <- ifelse(bam_i$expert_lrgen > 0, 1, 0)
# Region
bam_i <- bam_i %>% 
  mutate(
    region = recode(
      as.character(region_id), 
      '1' = 'Europe', 
      '2' = 'Latin America', 
      '3' = 'North America', 
      '4' = 'Australia', 
      '5' = 'Israel', 
      '6' = 'North America'
    ), 
    region = factor(
      region, 
      levels = c('Europe', 'Latin America', 'North America', 'Australia', 'Israel')
    )
  )

# Region dumies
bam_i$eu <- ifelse(bam_i$region_id == 1, 1, 0)
bam_i$la <- ifelse(bam_i$region_id == 2, 1, 0)
bam_i$na <- ifelse(bam_i$region_id == 3 | bam_i$region_id == 6, 1, 0)
bam_i$au <- ifelse(bam_i$region_id == 4, 1, 0)
bam_i$is <- ifelse(bam_i$region_id == 5, 1, 0)
# Age
bam_i$age <- 2022 - bam_i$expert_year
bam_i$age <- ifelse(is.na(bam_i$age), 0, scale(bam_i$age)) # Imputed age with mean if missing
# Female (Non response imputed as Male)
bam_i$female <- 1
bam_i$female[bam_i$expert_gender == 2 & bam_i$region_id == 1] <- 0
bam_i$female[bam_i$expert_gender == 1 & bam_i$region_id == 2] <- 0
bam_i$female[bam_i$expert_gender == 1 & bam_i$region_id == 3] <- 0
bam_i$female[bam_i$expert_gender == 1 & bam_i$region_id == 4] <- 0
bam_i$female[bam_i$expert_gender == 1 & bam_i$region_id == 5] <- 0
bam_i$female[bam_i$expert_gender == 2 & bam_i$region_id == 6] <- 0

# Remove missings (Left right: only 7 cases)
bam_i <- bam_i %>% 
  filter(!is.na(expert_lrgen))

sapply(bam_i, function(x) sum(is.na(x)))

# Models

## Alpha
# Data
stan_data_alpha <- list(
  N = length(bam_i$expert_lrgen), # Number of observations
  K = 7, # Number of predictors
  x = bam_i %>% 
    select(
      expert_lrgen, 
      age, 
      female,
      la, 
      na, 
      au,
      is
    ), # Independent variable 
  y = bam_i$alpha # dependent variable
)

## Model
alpha_lm <- stan(
  "scripts/stan/01_lm.stan", 
  data = stan_data_alpha, 
  seed = 1, 
  chains = 2, 
  iter = 3000
)
saveRDS(alpha_lm, 'outputs/alpha_lm.rds') 

round(summary(alpha_lm)$summary, 2)

## Beta
# Data
stan_data_beta <- list(
  N = length(bam_i$expert_lrgen), # Number of observations
  K = 7, # Number of predictors
  x = bam_i %>% 
    select(
      expert_lrgen_dist, 
      age, 
      female,
      la, 
      na, 
      au,
      is
    ), # Independent variable 
  y = bam_i$beta # dependent variable
)

## Model
beta_lm <- stan(
  "scripts/stan/01_lm.stan", 
  data = stan_data_beta, 
  seed = 1, 
  chains = 2, 
  iter = 2000
)
saveRDS(alpha_lm, 'outputs/beta_lm.rds') 

round(summary(beta_lm)$summary, 2)

# Plots
# Extract Coefficients
model_data <- rbind(
  as.data.frame(rstan::extract(alpha_lm, "beta")) %>% 
    pivot_longer(everything(), names_to = 'var', values_to = 'score') %>% 
    mutate(
      var = recode(
        var, 
        'beta.1' = 'Left-Right',
        'beta.2' = 'Age', 
        'beta.3' = 'Female (ref. Other)',
        'beta.4' = 'Latin America (ref. EU)', 
        'beta.5' = 'North America (ref. EU)', 
        'beta.6' = 'Australia (ref. EU)',
        'beta.7' = 'Israel (ref. EU)'
      ),
      var = factor(
        var, 
        levels = c(
          'Left-Right',
          'Female (ref. Other)',
          'Age',
          'Australia (ref. EU)',
          'Israel (ref. EU)',
          'Latin America (ref. EU)', 
          'North America (ref. EU)'
        )
      ), 
      model = 'Alpha'
    ),
  as.data.frame(rstan::extract(beta_lm, "beta")) %>% 
    pivot_longer(everything(), names_to = 'var', values_to = 'score') %>% 
    mutate(
      var = recode(
        var, 
        'beta.1' = 'Left-Right',
        'beta.2' = 'Age', 
        'beta.3' = 'Female (ref. Other)',
        'beta.4' = 'Latin America (ref. EU)', 
        'beta.5' = 'North America (ref. EU)', 
        'beta.6' = 'Australia (ref. EU)',
        'beta.7' = 'Israel (ref. EU)'
      ),
      var = factor(
        var, 
        levels = c(
          'Left-Right',
          'Female (ref. Other)',
          'Age',
          'Australia (ref. EU)',
          'Israel (ref. EU)',
          'Latin America (ref. EU)', 
          'North America (ref. EU)'
        )
      ), 
      model = 'Beta'
    )
)

# PLOT 
ggplot(model_data, aes(x = score)) +
  geom_density(fill = "grey", alpha = 0.4, bw = 0.008, adjust = 1.5) + #adjust = 1.5, 
  scale_x_continuous(limits = c(-0.35, 0.35)) +
  geom_vline(xintercept = 0, linetype = "longdash", alpha = 0.3) +
  facet_grid(
    rows = vars(var),
    cols = vars(model),
    scales = "free",
    switch = 'y'
  ) +
  labs(x = "Coefficients Posterior Distribution", y = '') +
  theme_ches() +
  theme(
    strip.text.y.left = element_text(
      angle = 0, # Horizontal text
      hjust = 0
    ),
    axis.text.y = element_blank()
  ) 

ggsave(
  "figures/fg4.tif", 
  width = 6, 
  height = 8
)

round(summary(alpha_lm)$summary, 2)
round(summary(beta_lm)$summary, 2)

summary(bam_i$alpha)

rm(
  stan_data_alpha, 
  stan_data_beta, 
  alpha_lm, 
  beta_lm
)

# Figure 5: CHES VS BAM --------------------------------------------------------

# Create extreme cases
bam_z <- bam_z  %>% 
  mutate(
    party_name = paste0(country, ": ", party), 
    extreme = ifelse(
      bam_score < quantile(bam_score, probs = 0.011) |
      bam_score > quantile(bam_score, probs = 0.991), 1, 0
    ),
    region = case_when(
      region_id == 1 ~ "Europe", 
      region_id == 2 ~ "Latin America", 
      region_id == 3 ~ "North America", 
      region_id == 4 ~ "Australia", 
      region_id == 5 ~ "Israel",
      region_id == 6 ~ "North America"
    )
  )

# Extreme cases plot
plot_bam_ex <- ggplot(data = bam_z, aes(x = lrecon, y = bam_score, color = region)) +
  geom_point(alpha = 0.8) +
  ggrepel::geom_text_repel(
    data = bam_z[bam_z$extreme == 1, ], # Low values
    aes(label = party_name), 
    color = 'black', 
    size = 2
  ) +
  scale_color_grey(start = 0, end = .8) +
  labs(x = 'CHES Left-Right Economic', y = 'BAM Left-Right Economic') +
  theme_ches()
plot_bam_ex 

# US, UK and Germany parties
bam_z <- bam_z %>%
  mutate(
    us = ifelse(
      country == 'United States' & party != "Romney" & party != "Sanders" & party != "AOC", 
      1, 
      0
    ), 
    eu = ifelse(
      party_name == 'UK: CONS' |
      party_name == 'UK: LAB' |
      party_name == 'Germany: CDU' |
      party_name == 'Germany: SPD' |
      party_name == 'Germany: FDP' |
      party_name == 'Germany: GRUNEN' |
      party_name == 'Germany: LINKE', 
      1, 
      0
    ),
    us_eu = ifelse(
      us == 1, "United States", 
      ifelse(eu == 1, "Europe", "Other")
    ), 
    us_eu = factor(us_eu, levels = c("United States", "Europe", "Other"))
  ) 


plot_bam_us <- ggplot(bam_z, aes(x = lrecon, y = bam_score, color = us_eu)) +
  geom_point(alpha = 0.6) +
  ggrepel::geom_text_repel(
    data = bam_z[bam_z$us_eu != "Other",], # Low values
    aes(label = party_name),
    color = 'black',
    size = 2
  ) +
  scale_y_continuous(limits = c(-2, 2)) +
  scale_x_continuous(limits = c(0, 10)) +
  scale_color_manual(values = c("grey0", "grey30", "grey90")) +
  labs(x = 'CHES Left-Right Economic', y = 'BAM Left-Right Economic') +
  theme_ches()
plot_bam_us

ggarrange(
  plot_bam_ex, 
  plot_bam_us, 
  ncol = 1, 
  labels = c("a)", "b)")
)

ggsave(
  "figures/fg5.tif", 
  width = 12,
  height = 7.5
)

# Figure 6: Differences distributions ------------------------------------------

bam_z$bam_1 <- (bam_z$bam_score - min(bam_z$bam_score))/(max(bam_z$bam_score) - min(bam_z$bam_score))
bam_z$lrecon_1 <- (bam_z$lrecon - min(bam_z$lrecon))/(max(bam_z$lrecon) - min(bam_z$lrecon))
bam_z$diff <- abs(bam_z$bam_1 - bam_z$lrecon_1)

ggplot(bam_z, aes(x = diff)) +
  geom_density(adjust = 1.5, fill = "grey", alpha = 0.5) +
  scale_x_continuous(limits = c(0, 1)) +
  geom_vline(aes(xintercept = sd(lrecon_1))) +
  labs(y = "", x = "Scaled Absolute Difference between Left-Right Economic and BAM Estimates") +
  theme(
    legend.title = element_blank(), # Delete legend title
    legend.key = element_rect(fill = NA, color = NA), # Delete grey annoying thing. 
    legend.box.background = element_rect(colour = "black"), #, fill = "transparent"
    legend.background = element_blank(), # If not it messes with the rectangle. 
    legend.position = "bottom", 
    panel.background = element_rect(fill = 'white', colour = 'white'),
    panel.grid.major = element_line(colour = "grey95"),
    panel.grid.minor = element_line(colour = "grey95"),
    axis.ticks.y = element_blank(),
    axis.ticks.x = element_blank(),
    strip.background = element_rect(fill = "white")
  )
ggsave(
  "figures/fg6.tif",  
  width = 8, 
  height = 5
)

summary(bam_z$diff) 
quantile(bam_z$diff, .95)

# Figure 7: PREDICTING DIFFERENCES ---------------------------------------------

# Merge differences with expert data
bam_z <- bam_z %>% 
  group_by(country) %>% 
  mutate(
    lrecon_sd = sd(lrecon, na.rm = TRUE), 
    placements_n = n()
  ) 

# Add number of experts
bam_iz <- merge(
  bam_z, 
  bam_i %>% 
    group_by(country) %>% 
    summarise(experts_n = n()), 
  by = "country"
)

# Vote Share
vs <- rbind(
  #Europe
  read.csv("data/vote_share/1999-2019_CHES_dataset_means(v3).csv") %>% 
    filter(year == 2019) %>% 
    select(party_id, vote) %>% 
    mutate(vote_share = vote/100) %>% 
    select(party_id, vote_share),
  #Europe Missing
  read.csv("data/vote_share/missing_vote_europe.csv") %>% 
    mutate(vote_share = vote/100) %>% 
    select(party_id, vote_share),
  # Latin America
  read.csv("data/vote_share/elections_and_vparty.csv") %>% 
    select(party_id, house_votes, president_vote_1) %>% 
    mutate(
      vote_share = ifelse(!is.na(house_votes), house_votes, president_vote_1), 
      vote_share = ifelse(!is.na(vote_share), vote_share, 0)
    ) %>% 
    select(party_id, vote_share),
  # Other places
  readxl::read_excel("data/vote_share/vote_seats_aus_can_isr.xlsx") %>% 
    mutate(vote_share = vote/100) %>% 
    select(party_id, vote_share)
) %>% 
mutate(vote_share = ifelse(is.na(vote_share), 0, vote_share)) 

vs <- vs[!duplicated(vs), ]

bam_iz <- merge(
  bam_iz, 
  vs, 
  by = "party_id", 
  all.y = FALSE
)

bam_iz$lrecon_dist <- abs(bam_iz$lrecon)

# Region dumies
bam_iz$eu <- ifelse(bam_iz$region_id == 1, 1, 0)
bam_iz$la <- ifelse(bam_iz$region_id == 2, 1, 0)
bam_iz$na <- ifelse(bam_iz$region_id == 3 | bam_iz$region_id == 6, 1, 0)
bam_iz$au <- ifelse(bam_iz$region_id == 4, 1, 0)
bam_iz$is <- ifelse(bam_iz$region_id == 5, 1, 0)


lapply(bam_iz, function(x) sum(is.na(x)))

stan.data <- list(
  N = length(bam_iz$diff), # Number of observations
  K = 9, # Number of predictors
  x = bam_iz %>% 
    select(
      lrecon_dist, 
      lrecon_sd,
      experts_n,
      placements_n,
      vote_share,
      la, 
      na, 
      au,
      is
    ), # Independent variable 
  y = bam_iz$diff # dependent variable
)

names(bam_iz)

## Model
bam_diff <- stan(
  "scripts/stan/01_lm.stan", 
  data = stan.data, 
  seed = 1, 
  chains = 2, 
  iter = 2000
)
saveRDS(bam_diff, 'outputs/bam_diff.rds') 

round(summary(bam_diff)$summary, 4)

model_data <- as.data.frame(rstan::extract(bam_diff, "beta")) %>% 
  pivot_longer(everything(), names_to = 'var', values_to = 'score') %>% 
  mutate(
    var = recode(
      var, 
      'beta.1' = 'Left-Right (dist. center)',
      'beta.2' = 'Left-Right SD', 
      'beta.3' = 'Number of Experts', 
      'beta.4' = 'Number of Placements',
      'beta.5' = "Vote Margin",
      'beta.6' = 'Latin America (ref. EU)', 
      'beta.7' = 'North America (ref. EU)', 
      'beta.8' = 'Australia (ref. EU)',
      'beta.9' = 'Israel (ref. EU)'
    ),
    var = factor(
      var, 
      levels = c(
        'Left-Right (dist. center)',
        'Left-Right SD',
        'Number of Experts', 
        'Number of Placements',
        'Vote Margin',
        'Australia (ref. EU)',
        'Israel (ref. EU)',
        'Latin America (ref. EU)', 
        'North America (ref. EU)'
      )
    )
  )

# PLOT 
ggplot(model_data, aes(x = score)) +
  geom_density(fill = "grey", alpha = 0.4) +
  scale_x_continuous(limits = c(-0.06, 0.06)) +
  geom_vline(xintercept = 0, linetype = "longdash", alpha = 0.3) +
  facet_grid(
    rows = vars(var),
    scales = "free",
    switch = 'y'
  ) +
  labs(x = "Coefficients Posterior Distribution", y = '') +
  theme_ches() +
  theme(
    strip.text.y.left = element_text(
      angle = 0, # Horizontal text
      hjust = 0
    ), # Text Left aligned
    axis.text.y = element_blank()
  ) # Eliminate ticks labels
ggsave(
  "figures/fg7.tif",
  width = 5, 
  height = 7
)

# Figure A4 and A5: Mean Squared Error ------------------------------------------------

# MCMC Chains
alpha_it <- cbind(
  df_expert %>% 
    ungroup() %>% 
    select(region_id, country_id, country) %>% 
    mutate(
      region = recode(
        as.character(region_id),
        '1' = 'Europe',
        '2' = 'Latin America',
        '3' = 'North America',
        '4' = 'Australia',
        '5' = 'Israel',
        '6' = 'North America'
      )
    ),
  rstan::extract(bam_fit, "alpha") %>% as.data.frame(.) %>% t(.)
)

beta_it <- cbind(
  df_expert %>% 
    ungroup() %>% 
    select(region_id, country_id, country) %>% 
    mutate(
      region = recode(
        as.character(region_id),
        '1' = 'Europe',
        '2' = 'Latin America',
        '3' = 'North America',
        '4' = 'Australia',
        '5' = 'Israel',
        '6' = 'North America'
      )
    ),
  rstan::extract(bam_fit, "beta") %>% as.data.frame(.) %>% t(.)
)

# Regional within and between
al_re_var <- sapply(alpha_it[5:8004], bw_var, x = alpha_it$region)  %>% 
  t() %>% 
  as.data.frame()
be_re_var <- sapply(beta_it[5:8004], bw_var, x = beta_it$region) %>% 
  t() %>% 
  as.data.frame()

# LA and Europe within and between variance
## Subset
alpha_la <- alpha_it %>% 
  filter(region == "Latin America")
alpha_eu <- alpha_it %>% 
  filter(region == "Europe")
beta_la <- beta_it %>% 
  filter(region == "Latin America")
beta_eu <- beta_it %>% 
  filter(region == "Europe")

## variances
al_la_var <- sapply(alpha_la[5:8004], bw_var, x = alpha_la$country)  %>% 
  t() %>% 
  as.data.frame()
al_eu_var <- sapply(alpha_eu[5:8004], bw_var, x = alpha_eu$country)  %>% 
  t() %>% 
  as.data.frame()
be_la_var <- sapply(beta_la[5:8004], bw_var, x = beta_la$country)  %>% 
  t() %>% 
  as.data.frame()
be_eu_var <- sapply(beta_eu[5:8004], bw_var, x = alpha_eu$country)  %>% 
  t() %>% 
  as.data.frame()

# Alpha quantiles
alpha_aov <- rbind(
  var_to_quan(al_re_var, "Region"),
  var_to_quan(al_la_var, "Latin America"),
  var_to_quan(al_eu_var, "Europe")
) %>% 
  mutate(
    type = factor(type, levels = c("Region", "Europe", "Latin America")), 
    variance = recode(variance, "between" = "Between Group", "within" = "Within Group")
  ) %>% 
  mutate(coef = "Alpha")
# Beta Quantiles
beta_aov <- rbind(
  var_to_quan(be_re_var, "Region"),
  var_to_quan(be_la_var, "Latin America"),
  var_to_quan(be_eu_var, "Europe")
) %>% 
  mutate(
    type = factor(type, levels = c("Region", "Europe", "Latin America")), 
    variance = recode(variance, "between" = "Between Group", "within" = "Within Group")
  ) %>% 
  mutate(coef = "Beta")

# Combine
aov <- rbind(alpha_aov, beta_aov)

ggplot(aov %>% filter(type == "Region"), aes(y = point, x = variance)) +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_errorbar(
    aes(ymin = lwr, ymax = upr),
    width = 0.1,
    position = position_dodge(width = 0.5)
  ) +
  scale_color_grey(start = 0, end = .6) +
  labs(x = "", y = "Mean Squared Error") +
  theme_ches() +
  facet_wrap(vars(coef))

ggsave(
  "figures/fgA4.tif",
  width = 9, 
  height = 6
)

ggplot(aov %>% filter(type != "Region"), aes(y = point, x = type, color = variance)) +
  geom_point(position = position_dodge(width = 0.5)) +
  geom_errorbar(
    aes(ymin = lwr, ymax = upr),
    width = 0.1,
    position = position_dodge(width = 0.5)
  ) +
  scale_color_grey(start = 0, end = .6) +
  labs(x = "", y = "Mean Squared Error") +
  theme_ches() +
  facet_wrap(vars(coef))

ggsave(
  "figures/fgA5.tif",
  width = 9, 
  height = 6
)

# Save session information for reproducibility
writeLines(capture.output(sessionInfo()), "outputs/session_info/02_figures.txt")